/********************************************************************
Name: Finding Control Group Size That Minimizes Forward 
	Prediction Error, Fixed Effects (Figure A4)
Author: Dan Thompson
Date: September 2021
Description: 

********************************************************************/

clear all
set matsize 11000
set maxvar 15000
gl path = "~/Dropbox/Mariel Effects"

* Bring in the analysis data
use "$path/Replication/data/county_analysis_data.dta", clear
keep if year<1980

* Create a placebo treatment dummy
gen placebo = year==1976 & treat

* Set up a place to store the output
sum rank
local num_counties = r(max)
matrix F = J(`num_counties', 1, .)

* Run the placebo analysis for each available control pool
qui forval i=25/`num_counties' {
	reghdfe rep_vs placebo if rank<=`i', a(year rank)
	matrix F[`i',1] = _b[placebo]
	if mod(`i', 100)==0 noi di "`i' of `num_counties'"
}
svmat F
rename F1 effect
keep effect
gen num_counties = _n
keep if effect!=.

* Compute the absolute value of the gap between the true zero placebo effect and the estimate
gen abs_effect = abs(effect)

* Save the results
save "$path/Replication/modified_data/prediction_fe.dta", replace


* Run the figure (Fig A4)
use "$path/Replication/modified_data/prediction_fe.dta", clear
twoway (line abs_effect num_counties, lc(gs2) lw(vthin)), ///
	yti("Abs(Prediction Error) (1976)") ///
	xti("Number of Counties") ///
	xli(250, lc(gs8) lp(shortdash)) ///
	xli(500, lc(gs8) lp(shortdash)) ///
	xli(1500, lc(gs8) lp(shortdash)) ///
	graphregion(color(white)) ///
	name("optimize_counties_fe", replace)
graph export "$path/Replication/output/prediction_fe.pdf", ///
	replace name("optimize_counties_fe")
